#!/usr/bin/env python3
'''
Author: Paschalis Agapitos
Project: Mestizajes

Provides date preprocessing utilities for historical biographical data,
including extraction of years from date strings, conversion of years to
century values (with BCE/CE handling), and helpers for cleaning and sorting
century representations used in downstream analysis.
'''

import pandas as pd

def get_centuries(year):
    if pd.isna(year):
        return None
    if int(year) > 0:
        century = (int(year) - 1) // 100 + 1
        return int(century)
    elif int(year) == 0:
        return 1  # Handle incorrect dates of 0
    else:
        # For BCE, the century calculation is different
        century = (abs(int(year)) - 1) // 100 + 1
        return -int(century)

# def extract_year(date_str):
#     try:
#         if isinstance(date_str, str):
#             # Split by '-' and look for the year part
#             parts = date_str.split('-')
#             if len(parts) > 1 and parts[0] == '':  # Handle negative dates like '-428-01-01'
#                 return -int(parts[1])  # Extract the year as a negative integer
#             elif len(parts[0]) > 4:  # Handle cases like '19931003-01-01'
#                 return int(parts[0][:4])  # Extract the first 4 digits as the year
#             elif len(parts[0]) > 0:  # Handle positive dates like '1976-09-25'
#                 return int(parts[0])
#         return None  # For invalid input
#     except (ValueError, IndexError):
#         # Handle cases where date_str is not properly formatted
#         return None

def extract_year(date_str):
    if date_str is None or date_str in ["None", "not a date"]:
        return None
    if isinstance(date_str, str):
        if date_str.startswith("-"):
            return int("-" + date_str[1:].split("-")[0])
        return int(date_str.split("-")[0])
    return None

def remove_leading_zero(date_str):
    if isinstance(date_str, str) and date_str.startswith('0'):
        return date_str.lstrip('0')
    return date_str

def century_to_sortable(century):
    parts = century.split()
    number = int(parts[0])
    era = parts[1]
    return (number * -1 if era == 'BCE' else number)